# -*- coding: utf-8 -*-
import scrapy
from lianjia.items import LianjiaItem
from scrapy.http import Request


class QuotesSpider(scrapy.Spider):
    name = 'quotes'
    allowed_domains = ['zz.lianjia.com']
    start_urls = ['https://zz.lianjia.com/ershoufang/pg']

    def start_requests(self):
        for i in range(1,101): #100页的所有信息
            print(i)
            url_1=self.start_urls+list(str(i))
            print(url_1)
            url = ''
            for j in url_1:
                url += j + ''
                print(url)
            yield Request(url, self.parse)

    def parse(self, response):
        infoall = response.xpath("//div[4]/div[1]/ul/li")
        for info in infoall:  #遍历所有的li
            item = LianjiaItem()
            info1 = info.xpath('div/div[1]/a/text()').extract_first()
            info1_url = info.xpath('div/div[1]/a/@href').extract_first()
            info2_dizhi = info.xpath('div/div[2]/div/a/text()').extract_first()
            # info2_xiangxi = info.xpath('div/div[2]/div/text()').extract()
            price = info.xpath('div/div[6]/div[1]/span/text()').extract_first()
            perprice = info.xpath('div/div[6]/div[2]/span/text()').extract_first()
            # info2_xiangxi = ''
            # for j1 in info2_xiangxi:
            #     info2_xiangxi += j1 + ''

            item['houseinfo'] = info1
            item['houseurl'] = info1_url
            item['housedizhi'] = info2_dizhi
            # item['housexinagxi'] = info2_xiangxi
            item['houseprice'] = price
            item['houseperprice'] = perprice
            yield item








